package LayerD.CodeDOM;

import java.util.*;
import java.io.*;

/**
 * This class, whose name is an acronym for "Trivial API for XML", is a 
 * container for a simple Parser class for parsing XML and its related Token, 
 * TokenType and ParseException classes and constants.
 * 
 * TAX.Parser is a simple, lightweight pull-parser that is useful for a variety
 * of simple XML parsing tasks. Note, however, that it is more of a tokenizer
 * than a true parser and that the grammar it parses is not actually XML, but a
 * simplified subset of XML. The parser has (at least) these limitations:
 *
 *   It does not enforce well-formedness. For example, it does not require
 *      tags to be properly nested.
 *   It is not a validating parser, and does not read external DTDs
 *   It does not parse the internal subset of the DOCTYPE tag, and cannot
 *      recognize any entities defined there.
 *   It is not namespace-aware
 *   It does not handle entity or character references in attribute values,
 *      not even pre-defined entities such as "
 *   It strips all whitespace from the start and end of document text, which,
 *      while useful for many documents, is not generally correct.
 *   It makes no attempt to do error recovery.  The results of calling next( )
 *      after a ParseException is thrown are undefined.
 *   It does not provide enough detail to reconstruct the source document
 * 
 * TAX.Parser always replaces entity references with their values, or throws
 * a Tax.ParseException if no replacement value is known.  The parser coalesces
 * adjacent text and entities into a single TEXT token. CDATA sections are
 * also returned as TEXT tokens, but are not coalesced.
 **/
	
public class XplParser {
    // Enumerated type return values for Token.type( )
    public static final TokenType TAG = new TokenType("TAG");
    public static final TokenType ENDTAG = new TokenType("ENDTAG");
    public static final TokenType TEXT = new TokenType("TEXT");
    public static final TokenType COMMENT = new TokenType("COMMENT");
    public static final TokenType PI = new TokenType("PI");
    public static final TokenType DOCTYPE = new TokenType("DOCTYPE");
    public static final TokenType XMLDECL = new TokenType("XMLDECL");

    // A type-safe enumeration for token types.  Note the private constructor
    public static class TokenType {
        private static int nextOrdinal = 0;
        private final int ordinal = nextOrdinal++;
        private final String name;
        private TokenType(String name) { this.name = name; }
        public String toString( ) { return name; }
    }

    // Token objects are the return value of the Parser.next( ) method.
    // They provide details about what was parsed and where.
    public static class Token {
        TokenType type;    // One of the constants above
        String text;       // Tagname for TAG & XMLDECL, 
                           // Complete text minus delimiters for other types
        int line, column;  // Position of start of token
        Map attributes;    // name/value map for TAG and XMLDECL,null otherwise
        boolean empty;     // true for XMLDECL and TAGs ending with "/>".
        
        // We use this constructor for TAG and XMLDECL tokens
        Token(TokenType t, String s, int l,int c,Map  a,boolean e) {
            this(t,s,l,c);
            this.attributes = a;
            this.empty = e;
        }
        // This constructor for other token types
        Token(TokenType type, String text, int line, int column) {
            this.type = type;
            this.text = text;
            this.line = line;
            this.column = column;
        }

        // Property accessor methods
        public TokenType type( ) { return type; }
        public String text( ) { return text; }
        public int line( ) { return line; }
        public int column( ) { return column; }
        public Map attributes( ) { return attributes; }
        public boolean empty( ) { return empty; }
    }

    // Exceptions of this type are thrown for syntax errors or unknown entities
    public static class ParseException extends Exception {
        public ParseException(String msg) { super(msg); }

        static ParseException expected(Token t, String expected) {
            return new ParseException("Expected " + expected + " at line " +
                                      t.line( ) + ", column " + t.column( ));
        }
    }

    // This is the parser class. It relies internally on a Tokenizer.
    // The public constructors allow you to parse XML from a CharSequence,
    // a Reader, or a Channel.   By default, it will return tokens of type TAG,
    // ENDTAG, and TEXT, and will ignore all others.  You can change this
    // behavior by passing token type constants to returnTokens( ) or
    // ignoreTokens( ).  By default the parser will replace character entities
    // and the pre-defined entities &, <, >, ", and ' with
    // their values.  You can define new entity name/replacement pairs by
    // calling defineEntity( ).  These configuration methods all return the
    // Parser objects so calls can be chained.  After configuring your Parser,
    // call the next( ) method repeatedly until it returns null.
    public static class Parser {
        XplTokenizer tokenizer;   // Used to break up the input
        Map entityMap;         // Map entity name to replacment
        String fileName;
        // Should we return tokens of these types?
        boolean[  ] returnTokenType = new boolean[7];

        public Parser(Reader in) {
        	this(new XplReaderTokenizer(in));
        }
        
        public Parser(String fileName) throws FileNotFoundException {
        	this(new FileReader(fileName));
        	this.fileName = fileName; 
		}        

        Parser(XplTokenizer tokenizer) {
            this.tokenizer = tokenizer;
            tokenizer.tokenizeSpaces(true);  // always tokenize spaces
            tokenizer.trackPosition(true);   // track line and column #
            // We don't always want the tokenizer to tokenize words, but when
            // we do, this is how we want the words formed.
            tokenizer.wordRecognizer(new XplTokenizer.WordRecognizer( ) {
                    public boolean isWordStart(char c) {
                        return Character.isLetter(c) || c == '_' || c == ':';
                    }
                    public boolean isWordPart(char c, char first) {
                        if (Character.isLetterOrDigit(c) ||
                            c == '_' || c=='-' || c=='.' || c==':')
                            return true;
                        int type = Character.getType(c);
                        return type == Character.COMBINING_SPACING_MARK ||
                            type == Character.ENCLOSING_MARK ||
                            type == Character.NON_SPACING_MARK ||
                            type == Character.MODIFIER_LETTER;
                    }
                });

            // Set pre-defined entitities
            entityMap = new HashMap( );
            entityMap.put("lt", "<");
            entityMap.put("gt", ">");
            entityMap.put("amp", "&");
            entityMap.put("quot", "\"");
            entityMap.put("apos", "'");

            // Set default values for what token types to return
            returnTokenType[TAG.ordinal] = true;
            returnTokenType[ENDTAG.ordinal] = true;
            returnTokenType[TEXT.ordinal] = true;
        }

       public Parser returnTokens(TokenType t) {
            returnTokenType[t.ordinal] = true;
            return this;
        }

        public Parser ignoreTokens(TokenType t) {
            returnTokenType[t.ordinal] = false;
            return this;
        }

        // This utility method is for reporting parsing errors
        void syntax(String msg) throws ParseException {
            if(fileName != null) {
            	throw new ParseException("Syntax error in file " + fileName + ": " + msg + " at line:" + 
                                         tokenizer.tokenLine( ) + ", column:" +
                                         tokenizer.tokenColumn( ));
            } else {
            	throw new ParseException("Syntax error: " + msg + " at line:" + 
                        tokenizer.tokenLine( ) + ", column:" +
                         tokenizer.tokenColumn( ));            	
            }
        }

        // This method returns the next XML token of input or null if there
        // is no more input to parse.
        public Token next( ) throws ParseException, IOException { 
            Token token = null;

            // Otherwise, loop until we find a token we want to return;
            for(;;) {
                // Invariant: we keep the tokenizer on the first unparsed token
                // This means we start our methods by calling tokenType( )
                // to examine what we're currently on, not by calling next( ).
                // But we end by calling next( ) to consume the stuff we've
                // already seen.
                int t = tokenizer.tokenType( );

                // If we're at the tokenizer's start state, then read a token
                if (t == XplTokenizer.BOF) t = tokenizer.next( );

                // If there is no more input, return null
                if (t == XplTokenizer.EOF) return null;

                // Skip any space. This is not technically correct: we don't
                // know if this is ignorable whitespace or not. But in
                // practice, most clients will want to ignore it.
                if (t == XplTokenizer.SPACE) {
                    tokenizer.next( );
                    continue;
                }
                
                // If the token is a open angle bracket, then this is markup
                // otherwise it is text.
                if (t == '<') token = parseMarkup( );
                else token = parseText( );

                // If the token we've parsed is one of the kind to be returned,
                // then return it.  Otherwise, continue looping for a new token
                if (returnTokenType[token.type.ordinal]) return token;
            }
        }

        // This method called with a current token of '<' to parse various
        // forms of XML markup
        Token parseMarkup( ) throws ParseException, IOException {
            if(tokenizer.tokenType( ) != '<'){
                throw new IOException("Se esperaba '<'."+ tokenizer.tokenLine());
                //System.exit(1); 
            }
            tokenizer.tokenType( );
            try {
                // Turn on word tokenizing. It is turned off in finally clause.
                tokenizer.tokenizeWords(true); 
                int t = tokenizer.next( );
                if (t == '?') {    // Markup is a PI or XMLDECL
                    t = tokenizer.next( );
                    if (t != XplTokenizer.WORD) syntax("XMLDECL or PI expected");
                    if (tokenizer.tokenText( ).equals("xml")) {
                        Token token =
                            new Token(XMLDECL, tokenizer.tokenText( ),
                                      tokenizer.tokenLine( ),
                                      tokenizer.tokenColumn( ) - 2,
                                      parseAttributes( ),
                                      true);

                        if (tokenizer.tokenType( )!='?') syntax("'?' expected");
                        if (tokenizer.next( ) != '>') syntax("'>' expected");
                        return token;
                    }
                    else {
                        Token token = new Token(PI, null,tokenizer.tokenLine( ),
                                                tokenizer.tokenColumn( )-2);
                        // Read to end of PI
                        tokenizer.scan("?>", true, true, false, true);
                        token.text = tokenizer.tokenText( );
                        return token;
                    }
                }
                
                if (t == '!') {         // Markup is DOCTYPE, CDATA, or Comment
                    t = tokenizer.next( );
                    if (t == XplTokenizer.WORD &&
                        tokenizer.tokenText( ).equals("DOCTYPE")) {
                        /*return parseDoctype( );*/
                    }
                    else if (t == '[') {
                        if (tokenizer.next( ) == XplTokenizer.WORD &&
                            tokenizer.tokenText( ).equals("CDATA") &&
                            tokenizer.next( ) == '[') {
                            Token token = new Token(TEXT, null,
                                                    tokenizer.tokenLine( ),
                                                    tokenizer.tokenColumn( )-8);
                            tokenizer.scan("]]>", true, false, false, true);
                            token.text = tokenizer.tokenText( );
                            return token;
                        }
                        else syntax("CDATA expected");
                    }
                    else if (t == '-' && tokenizer.next( ) == '-') {
                        // a COMMENT token
                        Token token = new Token(COMMENT, null, 
                                                tokenizer.tokenLine( ),
                                                tokenizer.tokenColumn( )-4);
                        tokenizer.scan("-->", true, false, false, true);
                        token.text = tokenizer.tokenText( );
                        return token;
                    }
                    else syntax("DOCTYPE, CDATA, or Comment expected");
                }
                if (t == '/') {    // Markup is an element end tag
                    t = tokenizer.next( );
                    if (t == XplTokenizer.WORD) {
                        Token token = new Token(ENDTAG, tokenizer.tokenText( ),
                                                tokenizer.tokenLine( ),
                                                tokenizer.tokenColumn( )-2);
                        
                        t = tokenizer.next( );
                        if (t == XplTokenizer.SPACE) t = tokenizer.next( );
                        if (t != '>') syntax("Expected '>'");
                        return token;
                    }
                    else syntax("ENDTAG expected.");
                }
                if (t == XplTokenizer.WORD) { // Markup is an element start tag
                    Token token = new Token(TAG, tokenizer.tokenText( ),
                                            tokenizer.tokenLine( ),
                                            tokenizer.tokenColumn( ) - 1,
                                            parseAttributes( ),
                                            tokenizer.tokenType( ) == '/');
                    
                    if (tokenizer.tokenType( ) == '/') tokenizer.next( );
                    if (tokenizer.tokenType( ) != '>') syntax("'>' expected");
                    return token;
                }
                
                // If none of the above matched, this is a syntax error
                syntax("Invalid character following '<'");

                // The compiler doesn't realize that syntax( ) never returns,
                // so it requires a return statement here.
                return null;
            }
            finally {
                // restore tokenizer state
                tokenizer.tokenizeWords(false);
                // Get the next token ready
                tokenizer.next( );           
            }
        }

        // Parse a sequence of name=value attributes, where value is always
        // quoted in single or double quotes, and return them as a Map.
        // When this method is called, the tokenizer is looking at the element
        // name, not at the first token to parse.
        // This is used when parsing element start tags and XMLDECLs
        Map parseAttributes( ) throws ParseException, IOException {
            try {
                // Adjust tokenizer to recognize quotes.
                // Defaults are restored in finally clause below
                tokenizer.quotes("'\"", "'\"");
                int t = tokenizer.next( ); // Consume the element name

                // Skip optional space
                if (t == XplTokenizer.SPACE) t = tokenizer.next( );

                // This is a special case for elements with no attributes
                if (t != XplTokenizer.WORD) return Collections.EMPTY_MAP; 

                Map m = new HashMap( );  // Where we'll store attributes

                while(t == XplTokenizer.WORD) {
                    String name = tokenizer.tokenText( );  // get attribute name
                    // The next token must be '='
                    if (tokenizer.next( ) != '=') syntax("'=' expected");
                    t = tokenizer.next( );
                    // The next token must be a quoted string
                    if (t != '"' && t != '\'')
                        syntax("quoted attribute value expected");
                    // Map attribute name to attribute value.
                    // The tokenizer strips the quotes for us.
                    // Note that we do not handle entity references here.
                    m.put(name, tokenizer.tokenText( ));
                    
                    // Consume the value and skip an optional space after it
                    t = tokenizer.next( );
                    if (t == XplTokenizer.SPACE) t=tokenizer.next( );
                }
                return m;
            }
            finally { // Always turn off quote tokenizing
                tokenizer.quotes("", "");
            }
        }

        // Coalesce any character data and entity references into a single 
        // TEXT token and return it, or throw an exception for undefined
        // entities.  Note that CDATA elements are also returned as TEXT 
        // tokens but are not coalesced like this.  When this method is called
        // we know that the tokenizer is looking at a char other than '<'.
        Token parseText( ) throws ParseException, IOException {
            if(tokenizer.tokenType( ) == '<'){
                throw new ParseException("Se esperaba '<'. "+tokenizer.tokenLine());
                //System.exit(1); 
            }
            tokenizer.tokenType( );
            // Save line and column info of the start of the text
            int line = tokenizer.tokenLine( );
            int column = tokenizer.tokenColumn( );
            StringBuffer b = new StringBuffer( ); // where we accumulate text

            int t;
            while((t = tokenizer.tokenType( )) != '<') {
                if (t == '&') b.append(parseEntityReference( ));
                else {
                    // Otherwise we've found some text
                    tokenizer.scan("<&",  // scan until we find one of these
                       false, // just match one, not the whole string
                       true,  // extend the token we've already started
                       false, // don't include delimiter char in the token
                       false);// don't skip delimiter; save for next token
                    b.append(tokenizer.tokenText( ));
                    tokenizer.next( );
                }
            }
            // Strip trailing space and return as a TEXT token
            return new Token(TEXT, b.toString( ).trim( ), line, column);
        }

        // Parse a reference to a general entity or character entity and
        // return its value as a string, or throw an exception for undefined
        // entities. Called when tokenizer is looking at an '&'.
        String parseEntityReference( ) throws ParseException, IOException {
            if(tokenizer.tokenType( ) != '&'){
                throw new ParseException("Se esperaba '&'."+tokenizer.tokenLine());
                //System.exit(1); 
            }
            tokenizer.tokenType( );
            String s = null;
            try {
                tokenizer.tokenizeWords(true);
                int t = tokenizer.next( );
                if (t == '#') {  // if it's a character reference
                    tokenizer.tokenizeNumbers(true);
                    t = tokenizer.next( );
                    String text = tokenizer.tokenText( );
                    if (t == XplTokenizer.NUMBER) {  // a decimal character ref
                        int n = Integer.parseInt(text);  // parse as base-10
                        s = Character.toString((char)n); // convert to string
                    }
                    else if (t == XplTokenizer.WORD && text.charAt(0) != 'x') {
                        // a hexadecimal character reference
                        String hex = text.substring(1);    // skip the 'x'
                        int n = Integer.parseInt(hex, 16); // parse as hex
                        s = Character.toString((char)n);   // convert to string
                    }
                    else syntax("illegal character following '&#'");
                }
                else { // otherwise a regular entity reference
                    if (t != XplTokenizer.WORD) syntax("entity expected");
                    // look up entity replacement
                    s = (String) entityMap.get(tokenizer.tokenText( ));
                    if (s == null) syntax("Undefined entity: '&" + 
                                          tokenizer.tokenText( ) + ";'");
                }
            }
            catch (NumberFormatException e) {
                // Convert NFE errors to syntax errors
                syntax("malformed character entity");
            }
            finally {  // Restore tokenizer state
                tokenizer.tokenizeWords(false).tokenizeNumbers(false);
            }
            
            // Require and consume the trailing semicolon
            if (tokenizer.next( ) != ';') syntax("';' expected");
            tokenizer.next( );
            return s;
        }
    }
}

